# Cocitaion analysis with the Biblionetwork package ----

## Load Packages -----

# install.packages(c("biblionetwork", "igraph", "qgraph")

library(biblionetwork)
library(tidyverse)
library(igraph)
library(qgraph)
library(readxl)

## Import and Name ----
# column names
new_colnames <- c("article_no", "year_citing_work", "cite_no", "year_cited_work",
                  "self_cite", "type_work", "title", "author", "author_order")
# data
solo_and_first_authors <-
  read_excel("data/data-rmt solo and first authors.xlsx",
             col_names = FALSE)

# add column names based on Tugce's metadata
names(solo_and_first_authors) <- new_colnames
rm(new_colnames)

# remove self-citations
#solo_and_first_authors <- solo_and_first_authors %>%
#  filter(self_cite == 0)

# Clean Data ----
# Note that updating this will change the numbers assigned to articles
solo_and_first_authors <- solo_and_first_authors %>%
  mutate(year_cited_work = ifelse(article_no == "063" # confirmed by checking cites
                & author == "broomg"
                & title == "towardco"
                & year_cited_work == "inpress",
                "2000",
                year_cited_work)) %>%
  mutate(year_cited_work = ifelse(article_no == "110" # confirmed by checking cites
                                  & author == "grunigj"
                                  & title == "organiza"
                                  & year_cited_work == "inpress",
                                  "2000",
                                  year_cited_work)) %>%
  mutate(title = ifelse(author == "broomg" # only checked 2 cites but appears to be the same paper
                                  & title == "concepts"
                                  & year_cited_work == "2000",
                                  "conceptt",
                                  title)) %>%
  mutate(title = ifelse(author == "broomg"  # only checked 2 cites but appears to be the same paper but the citation is different!
                        & title == "conceptst"
                        & year_cited_work == "2000",
                        "conceptt",
                        title)) %>%
  mutate(title = ifelse(author == "broomg"  # only checked 2 cites but appears to be the same paper but the citation is different
                        & title == "conceptt"
                        & year_cited_work == "1997",
                        "towardco",
                        title)) %>%
  mutate(type_work = ifelse(author == "broomg"  # only checked 2 cites but appears to be the same paper
                        & title == "towardco"
                        & year_cited_work == "1997"
                        & type_work == "8",
                        "1",
                        type_work)) %>%
# confirmed: consolidate broomg_1997_towardco_1 and broomg_2000_conceptt_8 into towardco
  mutate(title = ifelse(author == "broomg" # fix name
                        & title == "conceptt"
                        & year_cited_work == "2000",
                        "towardco",
                        title)) %>%
  mutate(type_work = ifelse(author == "broomg" # fix type
                        & title == "towardco"
                        & year_cited_work == "2000"
                        & type_work == 8,
                        1,
                        type_work)) %>%
  mutate(year_cited_work = ifelse(author == "broomg" # fix date
                            & title == "towardco"
                            & year_cited_work == "2000"
                            & type_work == 1,
                            "1997",
                            year_cited_work)) %>%
# fix: 001	2000	10	inpress(=2000)	0	8	organiza	grunigj	1
# and 063	1999	19	inpress(=2000)	0	8	organiza	grunigj	1
# and 110	1999	12	inpress(=2000)	0	8	organiza	grunigj	1
# and 174	2019	17	1999(=2000)	0	8	organiza	grunigj	1
  mutate(year_cited_work = ifelse(author == "grunigj"
                         & title == "organiza"
                         & author_order == 1
                         & type_work == "8",
                         "2000",
                         year_cited_work)) %>%
  # fix: 110	1999	20	1992	0	8	whatisef	grunigj(=grunigl)	1
  # 054	2001	35	1992	0	8	whatisef	grunigj(=grunigl)	1
  mutate(author = ifelse(author == "grunigj"
                                  & title == "whatisef"
                                  & author_order == 1
                                  & year_cited_work == "1992",
                         "grunigl",
                         author)) %>%
# fix: 002	2007	22	1992	0	8	whatisan(=whatisef)	grunigj	2
# and: 002	2007	22	1992	0	8	whatisan(=whatisef)	grunigl	1
  mutate(title = ifelse(title == "whatisan"
                         & year_cited_work == "1992",
                         "whatisef",
                         title)) %>%
#  101	2017	23	2002	0	8	excellenc	grunigj	2
#101	2017	23	2002	0	8	excellenc	grunigl	1
#  104	2017	27	2002	0	8	excellent	grunigj	2
#104	2017	27	2002	0	8	excellent	grunigl	1
#  105	2017	43	2002	0	8	excellent	dozierd	3
  mutate(title = ifelse((title == "excellenc" | title == "excellent")
                        & year_cited_work == "2002"
                        & type_work == "8"
                        & (author == "grunigl" | author == "grunigj" | author == "dozierd"),
                        "excellen",
                        title)) %>%
# 037	2004	4	1999	0	13	guidelin	honl	2
# 037	2004	4	1999	0	13	guidelin	grunigj	1
# 040	2006	21	1999	0	13	guidelin	grunigj	1
# in these citation (articles 3, 37, 40) Hon, L is listed 2nd but the citation is to the
# guidelines in which she is first author: should correct. Note that art 91 cites
# guidelines from 2001 that are a different paper, Grunig J is first author
  mutate(author_order = ifelse(title == "guidelin" # make grunigj auth 2
                        & year_cited_work == "1999"
                        & type_work == "13"
                        & author == "grunigj"
                        & author_order == 1,
                        2,
                        author_order)) %>%
  mutate(author_order = ifelse(title == "guidelin" # make honl auth 1
                               & year_cited_work == "1999"
                               & type_work == "13"
                               & author == "honl"
                               & author_order == 2,
                               1,
                               author_order))

# Create a unique "article" variable
for_cocitaion <- solo_and_first_authors %>%
  mutate(cited_art = paste(author, year_cited_work, title, type_work, sep = "_")) %>%
  select(article_no, cited_art)

# ... and assign it an index. Note that this index will change if
# we do more data cleaning and change the number of unique articles
# by consolidating articles
for_cocitaion$cited_art_idx <-
  as.integer(factor(for_cocitaion$cited_art ))

## BiblioNetwork Analysis ----
cocit <- biblio_cocitation(for_cocitaion,
                           source = "article_no",
                           ref = "cited_art",
                           normalized_weight_only = FALSE,
                           weight_threshold = 20)

cocit <- cocit %>%
  mutate(bucket = case_when(nb_shared_citations >= 1
                            & nb_shared_citations < 5 ~ "1-4",
                            nb_shared_citations >= 5
                            & nb_shared_citations < 10 ~ "5-10",
                            nb_shared_citations >= 10
                            & nb_shared_citations < 20 ~ "10-19",
                            nb_shared_citations >= 20
                            & nb_shared_citations < 30 ~ "20-29",
                            nb_shared_citations >= 30
                            & nb_shared_citations < 40 ~ "30-39",
                            nb_shared_citations >= 40
                            & nb_shared_citations < 50 ~ "40-49",
                            nb_shared_citations >= 50
                            & nb_shared_citations < 60 ~ "50-59",
                            nb_shared_citations >= 60
                            & nb_shared_citations < 70 ~ "60-69"))

cocit$bucket <- as_factor(cocit$bucket)

write_csv(cocit, "results/cocitation_min_15.csv")

cocit_summary <- cocit %>%
  group_by(bucket) %>%
  summarise(count = n()) %>%
  arrange(count)

cocit_names <- cocit %>%
  select(from, to, bucket) %>%
  mutate(from = as.integer(from)) %>%
  mutate(to = as.integer(to))

names_from <- merge(cocit_names, for_cocitaion,
      by.x = "from",
      by.y = "cited_art_idx",
      all.x = TRUE)

names_from <- names_from %>%
  select(from, cited_art) %>%
  distinct() %>%
  rename(idx = from)

names_to <- merge(cocit_names, for_cocitaion,
                    by.x = "to",
                    by.y = "cited_art_idx",
                    all.x = TRUE)

names_to <- names_to %>%
  select(to, cited_art) %>%
  distinct() %>%
  rename(idx = to)

names <- rbind(names_from, names_to)
rm(names_from, names_to, cocit_names)
names <- distinct(names)

write_csv(names, "data/names.csv")

write_csv(cocit_summary, "results/cocitation_summary.csv")

cocit_for_Tugce <- cocit %>%
  select(from, to, bucket)

write_csv(cocit_for_Tugce, "results/shared_citations.csv")

cocit_for_graph <- cocit %>%
  mutate(weight_simple = nb_shared_citations/50) %>%
  select(from, to, weight_simple) %>%
  mutate(weight = weight_simple) %>%
  select(from, to, weight)

write_csv(cocit_for_graph, "data/cocit_for_graph.csv")

names_for_graph_manual <- read_csv("data/names_for_graph_manual.csv")

## Visualize using qgraph ----

qgraph(cocit_for_graph, directed = FALSE)
qgraph(cocit_for_graph, directed = FALSE, layout = "spring")

ggsave("weighted_graph.jpg")ggsave("weightgroups()ed_graph.jpg")

cocit_igraph <- graph_from_data_frame(cocit_for_graph,
                                      directed = FALSE)
cocit_igraph

plot(cocit_igraph, main="default layout\n(igraph)")

plot(cocit_igraph,
     main="layout_with_fr\n(igraph)",
     layout=layout_with_fr)


plot(cocit_igraph,
     main="layout_with_fr",
     layout=function(cocit_igraph)layout_with_fr(cocit_igraph))

tkplot(cocit_igraph)

com <- cluster_spinglass(cocit_igraph, spins=5)
V(cocit_igraph)$color <- com$membership+1
cocit_igraph <- set_graph_attr(cocit_igraph, "layout",
                    layout_with_kk(cocit_igraph))
plot(cocit_igraph, vertex.label.dist=1.5, main = "Cluster Spinglass min 15")

cocit_igraph
